import pandas as pd
import numpy as np
import statsmodels.api as sm

# Define the file path to the research project's replication package 
filepath = r'C:\Users\mehran.azimi\\'

# output file name
file_name='IS_multivar'

BSsize=10000

def RegAndStats():
    #in-sample and bootstrap 
    data_IS_m=data_IS[[depvar,'yearmo','monthnum']+indepvars]
    data_IS_m=data_IS_m.dropna().reset_index()
    for var in [depvar]+indepvars:
        data_IS_m[var]=(data_IS_m[var]-data_IS_m[var].mean())/data_IS_m[var].std(ddof=0) 
    begdate=data_IS_m['yearmo'].iloc[0]
    enddate=data_IS_m['yearmo'].iloc[-1]
    firstmonthnum=data_IS_m[data_IS_m['yearmo']==begdate].index.values[0]
    lastmonth=data_IS_m[data_IS_m['yearmo']==enddate].index.values[0]
    initsamplesize= int((lastmonth - firstmonthnum)/2)+1
    nobservations=lastmonth-firstmonthnum+1
    nforecasts=nobservations - taw
    x1=np.array(data_IS_m[indepvars[0]][firstmonthnum:lastmonth+1-taw].astype(float))
    x2=np.array(data_IS_m[indepvars[1]][firstmonthnum:lastmonth+1-taw].astype(float))
    x=np.hstack((x1.reshape(len(x1),1),x2.reshape(len(x1),1)))
    X = sm.add_constant(x)
    Y=np.array(data_IS_m[depvar][firstmonthnum+taw:lastmonth+1])
    modelA1 = sm.OLS(Y, X)  # Huang et al 2014
    resultsA1 = modelA1.fit()  
    resultsA1NW = modelA1.fit( cov_type='HAC',cov_kwds={'maxlags':12} )
    ehat= Y - ( resultsA1._results.params[0] + resultsA1._results.params[1]*x1 + resultsA1._results.params[2]*x2)
    ymean=Y.mean()
    R2=resultsA1._results.rsquared; R2adj=resultsA1._results.rsquared_adj
    beta=resultsA1._results.params[1:]; t_stat= resultsA1NW._results.tvalues[1:]
        # AR(1) for independent variable 1
    x1=np.array(data_IS_m[indepvars[0]][firstmonthnum:lastmonth].astype(float))
    X = sm.add_constant(x1)
    y=np.array(data_IS_m[indepvars[0]][firstmonthnum+1:lastmonth+1].astype(float))
    modelA2_1 = sm.OLS(y, X)
    resultsA2_1 = modelA2_1.fit()
        #to calculate A4 in Huang et al, first calculate reduced biased estimates
        #calculate reduced bias estimates.
        #following Amihud and Hurvich method and notations
    rohat1 = resultsA2_1._results.params[1]
    rohat_corrected1 = rohat1 + (1+3*rohat1)/len(y) + 3*(1+3*rohat1)/len(y) **2
        #Now calculate A4
    phihat1= y - ( resultsA2_1._results.params[0] + rohat_corrected1*x1 )
        # AR(1) for independent variable 2
    x2=np.array(data_IS_m[indepvars[1]][firstmonthnum:lastmonth].astype(float))
    X = sm.add_constant(x2)
    y=np.array(data_IS_m[indepvars[1]][firstmonthnum+1:lastmonth+1].astype(float))
    modelA2_2 = sm.OLS(y, X)
    resultsA2_2 = modelA2_2.fit()
        #to calculate A4 in Huang et al, first calculaye reduced biased estimates
        #calculate reduced bias estimates.
        #following Amihud and Hurvich method and notations
    rohat2 = resultsA2_2._results.params[1]
    rohat_corrected2 = rohat2 + (1+3*rohat2)/len(y) + 3*(1+3*rohat2)/len(y) **2
        #Now calculate A4
    phihat2= y - ( resultsA2_2._results.params[0] + rohat_corrected2*x2 )
        #create pseudo sample and run Bootstrap  
    beta1_BStrap=np.array([]); beta2_BStrap=np.array([])
    tstat1_BStrap=np.array([]); tstat2_BStrap=np.array([])
    for b in range(BSsize):
        Wt1=np.random.normal(size=(len(ehat)))
        rtilda=ymean + ehat*Wt1
        xtilda1=np.array([x1[0]]); xtilda2=np.array([x2[0]])
        for i in range(1,len(rtilda)):
            xtilda1=np.append(xtilda1, resultsA2_1._results.params[0] + rohat_corrected1* xtilda1[i-1] + phihat1[i-1]*Wt1[i-1] )
            xtilda2=np.append(xtilda2, resultsA2_2._results.params[0] + rohat_corrected2* xtilda2[i-1] + phihat2[i-1]*Wt1[i-1] )
        modelBStrap=sm.OLS(rtilda,sm.add_constant(  np.hstack((xtilda1.reshape(len(xtilda1),1),xtilda2.reshape(len(xtilda1),1)))    ))
        resultsBStrap = modelBStrap.fit(cov_type='HAC',cov_kwds={'maxlags':12})
        beta1_BStrap=np.append(beta1_BStrap, resultsBStrap._results.params[1])
        beta2_BStrap=np.append(beta2_BStrap, resultsBStrap._results.params[2])
        tstat1_BStrap=np.append(tstat1_BStrap,resultsBStrap._results.tvalues[1])
        tstat2_BStrap=np.append(tstat2_BStrap,resultsBStrap._results.tvalues[2])
    if beta[0]<0:
        p_value1=len(tstat1_BStrap[tstat1_BStrap < resultsA1NW._results.tvalues[1]  ]) / len(tstat1_BStrap) ; p_value1
    else:
        p_value1=len(tstat1_BStrap[tstat1_BStrap > resultsA1NW._results.tvalues[1]  ]) / len(tstat1_BStrap) ; p_value1
    if beta[1]<0:
        p_value2=len(tstat2_BStrap[tstat2_BStrap < resultsA1NW._results.tvalues[2]  ]) / len(tstat2_BStrap) ; p_value2
    else:
        p_value2=len(tstat2_BStrap[tstat2_BStrap > resultsA1NW._results.tvalues[2]  ]) / len(tstat2_BStrap) ; p_value2
    if p_value1<0.01:
        sig='***'
    elif p_value1<0.05:
        sig='**'
    elif p_value1<0.1:
        sig='*'
    else:
        sig=''
    beta_sig1 = str(round(beta[0],2))+sig
    
    if p_value2<0.01:
        sig='***'
    elif p_value2<0.05:
        sig='**'
    elif p_value2<0.1:
        sig='*'
    else:
        sig=''
    beta_sig2 = str(round(beta[1],2))+sig
    resultsIS.loc[len(resultsIS.index)]=[begdate,enddate,depvar,indepvars[0],indepvars[1], beta[0], t_stat[0], p_value1, beta[1], t_stat[1], p_value2, R2, R2adj ,beta_sig1, beta_sig2 ]

# Load data 
data_IS=pd.read_csv(filepath + 'AllData.csv')
data_IS=data_IS.reset_index()

for var in ['alpha','usdp','bw_dyn_2021','RNS','skvw','ra_bex']: 
     data_IS[var]=(data_IS[var]-data_IS[var].mean())/data_IS[var].std(ddof=0)      

resultsIS=pd.DataFrame(columns=['begdate','enddate','depvar','indepvar1','indepvar2', 'beta1', 't_stat1', 'p_v_Bootstrap1','beta2', 't_stat2', 'p_v_Bootstrap2','R2','R2adj' ,'beta1_sig','beta2_sig' ])
taw=1
for dep in ['logMktRF','Sharpe_1']:
    for var in [ ['alpha','usdp'], ['alpha','bw_dyn_2021'],['alpha','RNS'], ['alpha','skvw'],['alpha','ra_bex'] ]:
        indepvars=var
        depvar=dep
        RegAndStats()
        print(dep, var)

resultsIS.to_excel(filepath + file_name+'.xlsx', index=False)
   
